library(dplyr)
library(lfe)
library(sjPlot)
library(stargazer)
library(ggplot2)
library(lubridate)
library(ggpattern)

###Adjust wd/file locations before running
setwd("")

#################### English PLots (Figure 1)

percents <- read.csv("Results_Percentages_Combined.csv")

percents <- filter(percents, Country == "USA")
percents <- filter(percents, Group != "GPT No Instructions")
percents <- filter(percents, Text == "Tweets")

#percents$Test <- percents$�..Test (First column data read error on some versions, ignore unless needed)

percents$Name <- factor(percents$Name,
                           levels = c("GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding",
                                      "GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding"
                           ), labels = c("GPT Run 1",
                                         "GPT Run 2",
                                         "Hybrid Coding",
                                         "GPT Run 1",
                                         "GPT Run 2",
                                         "Hybrid Coding"
                           ))

percents$Test <- factor(percents$Test,
                        levels = c("Political", "Negative", "Sentiment", "Ideology"), labels = c("Political", "Negative", "Sentiment", "Ideology"))

percents$Text <- factor(percents$Text,
                        levels = c("Tweets", "Articles"), labels = c("Tweets", "Articles"))

ggplot(percents, aes(x=Name, y=Accuracy, fill = Group)) +
  geom_bar(stat="identity")+
  geom_text(aes(label=Accuracy), vjust=1.6, color="white", size=3.5)+
  scale_fill_manual(values=c("grey25",
                             "grey50"))+
  ylab("Percentage of Accurate Classifications (USA)")+
  xlab("")+
  facet_wrap(~Test, ncol = 4)+
  theme_bw()+
  theme(legend.position="bottom")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
ggsave("CodingAccuracy Plot.png")



############# USA Text Type (Figure 2)

percents <- read.csv("Results_Percentages_Combined.csv")

percents <- filter(percents, Country == "USA")
percents <- filter(percents, Group != "GPT No Instructions")

#percents$Test <- percents$�..Test

percents <- filter(percents, Test != "Sentiment")
percents <- filter(percents, Test != "Ideology")

percents$Name <- factor(percents$Name,
                        levels = c("GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding",
                                   "GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding"
                        ), labels = c("GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding",
                                      "GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding"
                        ))

percents$Test <- factor(percents$Test,
                        levels = c("Political", "Negative"), labels = c("Political", "Negative"))

percents$Text <- factor(percents$Text,
                        levels = c("Tweets", "Articles"), labels = c("Tweets", "Articles"))

ggplot(percents, aes(x=Name, y=Accuracy, label = Accuracy, fill = Group)) +
  geom_bar_pattern(aes(fill = Group, pattern = Text),
                   pattern_colour  = 'black', pattern_fill = 'black', colour= 'black', pattern_density = 0.2,  position = "dodge", stat="identity")+
  geom_text(aes(label=Accuracy, group = Text), vjust=1.6, color="white", size=3.5, position = position_dodge(width = .9))+
  scale_fill_manual(values=c("grey25",
                             "grey50"), name = "Coding")+
  scale_pattern_manual(values = c('none', 'stripe'), name = "Text type") +
  ylab("Percentage of Accurate Classifications (USA)")+
  xlab("")+
  facet_wrap(~Test)+
  theme_bw()+
  theme(legend.position="bottom")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
  guides(
    pattern = guide_legend(override.aes = list(
      fill = "white"), order = 2),
    fill = guide_legend(override.aes = list(
      pattern = c("none", "none"))))
ggsave("CodingAccuracy Plot TextLength.png")


########################
########### Non-USA (Figure 3)

percents <- read.csv("Results_Percentages_Combined.csv")

percents <- filter(percents, Country != "USA")
percents <- filter(percents, Group != "GPT No Instructions")

#percents$Test <- percents$�..Test


percents$Name <- factor(percents$Name,
                        levels = c("GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding",
                                   "GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding"
                        ), labels = c("GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding",
                                      "GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding"
                        ))

percents$Test <- factor(percents$Test,
                        levels = c("Political", "Negative", "Sentiment", "Ideology"), labels = c("Political", "Negative", "Sentiment", "Ideology"))

percents$Text <- factor(percents$Text,
                        levels = c("Tweets", "Articles"), labels = c("Tweets", "Articles"))

ggplot(percents, aes(x=Name, y=Accuracy, fill = Group)) +
  geom_bar(stat="identity")+
  geom_text(aes(label=Accuracy), vjust=1.6, color="white", size=3.5)+
  scale_fill_manual(values=c("grey25",
                             "grey50"))+
  ylab("Percentage of Accurate Classifications (Non-English)")+
  xlab("")+
  facet_wrap(~ Country + Test, ncol = 4)+
  theme_bw()+
  theme(legend.position="bottom")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
ggsave("CodingAccuracy Plot International.png", width = 9.95, height = 10)




########################################Appendix
#########################F1 Scores
#################### English PLots

percents <- read.csv("Results_F1_Combined.csv")

percents <- filter(percents, Country == "USA")
percents <- filter(percents, Group != "GPT_NoInstructions")
percents <- filter(percents, Text == "Tweets")

#percents$Test <- percents$�..Test

percents$Name <- factor(percents$Name,
                        levels = c("GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding",
                                   "GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding"
                        ), labels = c("GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding",
                                      "GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding"
                        ))

percents$Test <- factor(percents$Test,
                        levels = c("Political", "Negative", "Sentiment", "Ideology"), labels = c("Political", "Negative", "Sentiment", "Ideology"))

percents$Text <- factor(percents$Text,
                        levels = c("Tweets", "Articles"), labels = c("Tweets", "Articles"))

ggplot(percents, aes(x=Name, y=F1, fill = Group)) +
  geom_bar(stat="identity")+
  geom_text(aes(label=F1), vjust=1.6, color="white", size=3.5)+
  scale_fill_manual(values=c("grey25",
                             "grey50"))+
  ylab("Macro F1 Score (USA)")+
  xlab("")+
  facet_wrap(~Test, ncol = 4)+
  theme_bw()+
  theme(legend.position="bottom")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
ggsave("CodingF1 Plot.png")



############# USA Text Type

percents <- read.csv("Results_F1_Combined.csv")

percents <- filter(percents, Country == "USA")
percents <- filter(percents, Group != "GPT_NoInstructions")

#percents$Test <- percents$�..Test

percents <- filter(percents, Test != "Sentiment")
percents <- filter(percents, Test != "Ideology")

percents$Name <- factor(percents$Name,
                        levels = c("GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding",
                                   "GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding"
                        ), labels = c("GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding",
                                      "GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding"
                        ))

percents$Test <- factor(percents$Test,
                        levels = c("Political", "Negative"), labels = c("Political", "Negative"))

percents$Text <- factor(percents$Text,
                        levels = c("Tweets", "Articles"), labels = c("Tweets", "Articles"))

ggplot(percents, aes(x=Name, y=F1, label = F1, fill = Group)) +
  geom_bar_pattern(aes(fill = Group, pattern = Text),
                   pattern_colour  = 'black', pattern_fill = 'black', colour= 'black', pattern_density = 0.2,  position = "dodge", stat="identity")+
  geom_text(aes(label=F1, group = Text), vjust=1.6, color="white", size=3.5, position = position_dodge(width = .9))+
  scale_fill_manual(values=c("grey25",
                             "grey50"), name = "Coding")+
  scale_pattern_manual(values = c('none', 'stripe'), name = "Text type") +
  ylab("Percentage of Accurate Classifications (USA)")+
  xlab("")+
  facet_wrap(~Test)+
  theme_bw()+
  theme(legend.position="bottom")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
  guides(
    pattern = guide_legend(override.aes = list(
      fill = "white", pattern = c('none', 'stripe')), order = 2),
    fill = guide_legend(override.aes = list(
      pattern = c("none", "none"))))
ggsave("CodingF1 Plot TextLength.png")


############# NON-US f1

percents <- read.csv("Results_F1_Combined.csv")

percents <- filter(percents, Country != "USA")
percents <- filter(percents, Group != "GPT No Instructions")

#percents$Test <- percents$�..Test


percents$Name <- factor(percents$Name,
                        levels = c("GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding",
                                   "GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding"
                        ), labels = c("GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding",
                                      "GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding"
                        ))

percents$Test <- factor(percents$Test,
                        levels = c("Political", "Negative", "Sentiment", "Ideology"), labels = c("Political", "Negative", "Sentiment", "Ideology"))

percents$Text <- factor(percents$Text,
                        levels = c("Tweets", "Articles"), labels = c("Tweets", "Articles"))

ggplot(percents, aes(x=Name, y=F1, fill = Group)) +
  geom_bar(stat="identity")+
  geom_text(aes(label=F1), vjust=1.6, color="white", size=3.5)+
  scale_fill_manual(values=c("grey25",
                             "grey50"))+
  ylab("Macro F1 Score (Non-English)")+
  xlab("")+
  facet_wrap(~ Country + Test, ncol = 4)+
  theme_bw()+
  theme(legend.position="bottom")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
ggsave("CodingF1 Plot International.png", width = 9.95, height = 10)



########################
########### Coding Instructions

percents <- read.csv("Results_Percentages_Combined.csv")

percents <- filter(percents, Country == "USA")

#percents$Test <- percents$�..Test

percents <- filter(percents, Test != "Ideology")
percents <- filter(percents, Text == "Tweets")


percents$Name <- factor(percents$Name,
                        levels = c("GPT Run 1",
                                   "GPT Run 2",
                                   "GPT Run 1 (No Instructions)",
                                   "GPT Run 2 (No Instructions)",
                                   "Hybrid Coding",
                                   "GPT Run 1",
                                   "GPT Run 2",
                                   "Hybrid Coding"
                        ), labels = c("GPT Run 1",
                                      "GPT Run 2",
                                      "GPT Run 1 (No Instructions)",
                                      "GPT Run 2 (No Instructions)",
                                      "Hybrid Coding",
                                      "GPT Run 1",
                                      "GPT Run 2",
                                      "Hybrid Coding"
                        ))

percents$Test <- factor(percents$Test,
                        levels = c("Political", "Negative", "Sentiment", "Ideology"), labels = c("Political", "Negative", "Sentiment", "Ideology"))

percents$Text <- factor(percents$Text,
                        levels = c("Tweets", "Articles"), labels = c("Tweets", "Articles"))

ggplot(percents, aes(x=Name, y=Accuracy, fill = Group)) +
  geom_bar(stat="identity")+
  geom_text(aes(label=Accuracy), vjust=1.6, color="white", size=3.5)+
  scale_fill_manual(values=c("grey25",
                             "grey75", "grey50"))+
  ylab("Percentage of Accurate Classifications (USA)")+
  xlab("")+
  facet_wrap(~ Country + Test, ncol = 4)+
  theme_bw()+
  theme(legend.position="bottom")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
ggsave("NoInstructions Plot.png")


